{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting /tmp/tensorflow/mnist/input_data\\train-images-idx3-ubyte.gz\n",
      "Extracting /tmp/tensorflow/mnist/input_data\\train-labels-idx1-ubyte.gz\n",
      "Extracting /tmp/tensorflow/mnist/input_data\\t10k-images-idx3-ubyte.gz\n",
      "Extracting /tmp/tensorflow/mnist/input_data\\t10k-labels-idx1-ubyte.gz\n",
      "step 100, entropy loss: 0.874281, l2_loss: 12650.720703, total loss: 1.759831\n",
      "0.75\n",
      "step 200, entropy loss: 0.671824, l2_loss: 12649.354492, total loss: 1.557279\n",
      "0.78\n"
     ]
    }
   ],
   "source": [
    "\"\"\"A very simple MNIST classifier.\n",
    "See extensive documentation at\n",
    "https://www.tensorflow.org/get_started/mnist/beginners\n",
    "\"\"\"\n",
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "import argparse\n",
    "import sys\n",
    "\n",
    "from tensorflow.examples.tutorials.mnist import input_data\n",
    "\n",
    "import tensorflow as tf\n",
    "\n",
    "# Import data\n",
    "data_dir = '/tmp/tensorflow/mnist/input_data'\n",
    "mnist = input_data.read_data_sets(data_dir, one_hot=True)\n",
    "# Define loss and optimizer\n",
    "x = tf.placeholder(tf.float32, [None, 784])\n",
    "y_ = tf.placeholder(tf.float32, [None, 10])\n",
    "learning_rate = tf.placeholder(tf.float32)\n",
    "\n",
    "with tf.name_scope('reshape'):\n",
    "  x_image = tf.reshape(x, [-1, 28, 28, 1])\n",
    "\n",
    "# First convolutional layer - maps one grayscale image to 32 feature maps.\n",
    "with tf.name_scope('conv1'):\n",
    "  shape = [5, 5, 1, 32]\n",
    "  W_conv1 = tf.Variable(tf.truncated_normal(shape, stddev=0.1),\n",
    "                        collections=[tf.GraphKeys.GLOBAL_VARIABLES,'WEIGHTS'])\n",
    "  shape = [32]\n",
    "  b_conv1 = tf.Variable(tf.constant(0.1, shape=shape))\n",
    "  l_conv1 = tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], \n",
    "                         padding='SAME') + b_conv1\n",
    "  h_conv1 = tf.nn.relu(l_conv1)\n",
    "\n",
    "# Pooling layer - downsamples by 2X.\n",
    "with tf.name_scope('pool1'):\n",
    "  h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1],\n",
    "                        strides=[1, 2, 2, 1], padding='VALID')\n",
    "\n",
    "# Second convolutional layer -- maps 32 feature maps to 64.\n",
    "with tf.name_scope('conv2'):\n",
    "  W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1),\n",
    "                        \n",
    "                        collections=[tf.GraphKeys.GLOBAL_VARIABLES,'WEIGHTS'])\n",
    "  b_conv2 = tf.Variable(tf.constant(0.1, shape=[64]))\n",
    "  l_conv2 = tf.nn.conv2d(h_pool1, W_conv2, strides=[1, 1, 1, 1], \n",
    "                         padding='SAME') + b_conv2\n",
    "  h_conv2 = tf.nn.relu(l_conv2)\n",
    "\n",
    "# Second pooling layer.\n",
    "with tf.name_scope('pool2'):\n",
    "  h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1],\n",
    "                        strides=[1, 2, 2, 1], padding='VALID')\n",
    "\n",
    "# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image\n",
    "# is down to 7x7x64 feature maps -- maps this to 1024 features.\n",
    "with tf.name_scope('fc1'):\n",
    "  W_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1024], stddev=0.1),\n",
    "                      \n",
    "                      collections=[tf.GraphKeys.GLOBAL_VARIABLES,'WEIGHTS'])\n",
    "  b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024]))\n",
    "\n",
    "  h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])\n",
    "  h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)\n",
    "\n",
    "# Dropout - controls the complexity of the model, prevents co-adaptation of\n",
    "# features.\n",
    "with tf.name_scope('dropout'):\n",
    "  keep_prob = tf.placeholder(tf.float32)\n",
    "  h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)\n",
    "\n",
    "# Map the 1024 features to 10 classes, one for each digit\n",
    "with tf.name_scope('fc2'):\n",
    "  W_fc2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1),\n",
    "                      collections=[tf.GraphKeys.GLOBAL_VARIABLES,'WEIGHTS'])\n",
    "  b_fc2 = tf.Variable(tf.constant(0.1, shape=[10]))\n",
    "\n",
    "  y = tf.matmul(h_fc1_drop, W_fc2) + b_fc2\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "# The raw formulation of cross-entropy,\n",
    "#\n",
    "#   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),\n",
    "#                                 reduction_indices=[1]))\n",
    "#\n",
    "# can be numerically unstable.\n",
    "#\n",
    "# So here we use tf.nn.softmax_cross_entropy_with_logits on the raw\n",
    "# outputs of 'y', and then average across the batch.\n",
    "cross_entropy = tf.reduce_mean(\n",
    "    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))\n",
    "\n",
    "l2_loss = tf.add_n( [tf.nn.l2_loss(w) for w in tf.get_collection('WEIGHTS')] )\n",
    "total_loss = cross_entropy + 7e-5*l2_loss\n",
    "train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)\n",
    "\n",
    "sess = tf.Session()\n",
    "init_op = tf.global_variables_initializer()\n",
    "sess.run(init_op)\n",
    "# Train\n",
    "for step in range(3000):\n",
    "  batch_xs, batch_ys = mnist.train.next_batch(100)\n",
    "  lr = 0.01\n",
    "  _, loss, l2_loss_value, total_loss_value = sess.run(\n",
    "               [train_step, cross_entropy, l2_loss, total_loss], \n",
    "               feed_dict={x: batch_xs, y_: batch_ys, learning_rate:lr, keep_prob:0.5})\n",
    "  \n",
    "  if (step+1) % 100 == 0:\n",
    "    print('step %d, entropy loss: %f, l2_loss: %f, total loss: %f' % \n",
    "            (step+1, loss, l2_loss_value, total_loss_value))\n",
    "    # Test trained model\n",
    "    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))\n",
    "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
    "    print(sess.run(accuracy, feed_dict={x: batch_xs, y_: batch_ys, keep_prob:0.5}))\n",
    "  if (step+1) % 1000 == 0:\n",
    "    print(sess.run(accuracy, feed_dict={x: mnist.test.images,\n",
    "                                    y_: mnist.test.labels, keep_prob:0.5}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
